# Replication code for Analysis of Secondary Migration (Part B)
# 07/01/2020
# all data sourced here is created by code_analysis_partA.do

# reshapes data for gravity models ----------------------------------------------------
rm(list=ls())
library(foreign)
library(reshape2)

## helper function
makedyadic <- function(filename,includedstates){
  
  # load data
  d <- read.csv(paste(filename,".csv",sep=""))
  if(min(dim(d))<=2){
    return(NA)
  }
  
  # remobe Xs
  colnames(d) <- gsub("X.","",colnames(d))
  
  # remove = 
  for(i in 1:ncol(d)){
    d[,i] <- gsub("=","",d[,i])
  }
  
  # remove b
  d <- d[-1,]
  
  # get rownames rstate
  rownames(d) <- d[,1]
  d <- d[,-1]
  
  # limit to allowable states
  d <- d[,colnames(d) %in% includedstates]
  d <- d[rownames(d)  %in% includedstates,]
  
  # make numeric
  for(i in 1:ncol(d)){
    d[,i] <-as.numeric(d[,i])
  }
  
  d$arrivals <- apply(d,1,sum)  
  d$rstate   <- rownames(d)
  
  dd <- melt(d,id=c("rstate","arrivals"))
  names(dd)[3] <- "lstate"
  names(dd)[4] <- "movers"
  
  dd$rstate   <- as.character(dd$rstate)
  dd$lstate   <- as.character(dd$lstate)
  
  return(dd)
  
}

### by state-origin dyads for big states

# big states
includedstates <- c("AZ","CA","CO","CT","FL","GA","IA","ID","IL","IN","KS","KY",
                    "LA","MA","MD","ME","MI","MN","MO","NC","ND","NE","NH","NJ",
                    "NV","NY","OH","OK","OR","PA","RI","SD","TN","TX","UT","VA",
                    "VT","WA","WI")

# set up an empty dataset 
dfill <- makedyadic("crosstab_rtolstate",includedstates=includedstates)[1,] 
dfill$year <- NA
dfill$rnat <- NA
dfill <- dfill[0,]

years   <- c(2001:2014,99)
rnat    <- c(1:10,12:16)

for(k in 1:length(rnat)){
 for(i in 1:length(years)){
 cat(i,k)
  
  filename <- paste("crosstab_rtolstate_rnat",rnat[k],"_rarrivefy_n",years[i],".csv",sep="")

  if(filename %in% list.files() ){
  d <- makedyadic(paste("crosstab_rtolstate_rnat",rnat[k],"_rarrivefy_n",years[i],sep=""),includedstates=includedstates)
  if(sum(is.na(d))!=1){
  d$year <- years[i]
  if(years[i]==99){d$year<-2000}
  d$rnat <- rnat[k]
  dfill <- rbind(dfill,d)
  }
  } else {
  stop()
 }
 }
}

# save
write.dta(dfill,file="dyadmovesorigin.dta")

## with all states

# all states
data(state)
includedstates <- state.abb

dfill <- makedyadic("crosstab_rtolstate",includedstates=includedstates)[1,]
dfill$year <- NA
dfill$rnat <- NA
dfill <- dfill[0,]

years   <- c(2001:2014,99)
rnat    <- c(1:10,12:16)

for(k in 1:length(rnat)){
  for(i in 1:length(years)){
  
    cat(i,k)
    filename <- paste("crosstab_rtolstate_rnat",rnat[k],"_rarrivefy_n",years[i],".csv",sep="")
    if(filename %in% list.files() ){
      d <- makedyadic(paste("crosstab_rtolstate_rnat",rnat[k],"_rarrivefy_n",years[i],sep=""),includedstates=includedstates)
      if(sum(is.na(d))!=1){
        d$year <- years[i]
        if(years[i]==99){d$year<-2000}
        d$rnat <- rnat[k]
        dfill <- rbind(dfill,d)
      }
    } else {
      stop()
    }
  }
}
# save
write.dta(dfill,file="dyadmovesorigin_allstates.dta")

### by state-origin dyads with 24 months LPR cutoff

# big states
includedstates <- c("AZ","CA","CO","CT","FL","GA","IA","ID","IL","IN","KS","KY",
                    "LA","MA","MD","ME","MI","MN","MO","NC","ND","NE","NH","NJ",
                    "NV","NY","OH","OK","OR","PA","RI","SD","TN","TX","UT","VA",
                    "VT","WA","WI")

dfill <- makedyadic("crosstab_rtolstate",includedstates=includedstates)[1,]
dfill$year <- NA
dfill$rnat <- NA
dfill <- dfill[0,]

years   <- c(2001:2014,99)
rnat    <- c(1:10,12:16)

for(k in 1:length(rnat)){
  for(i in 1:length(years)){
    cat(i,k)
    
    filename <- paste("crosstab_rtolstate24_rnat",rnat[k],"_rarrivefy_n",years[i],".csv",sep="")
    
    if(filename %in% list.files() ){
      d <- makedyadic(paste("crosstab_rtolstate24_rnat",rnat[k],"_rarrivefy_n",years[i],sep=""),includedstates=includedstates)
      if(sum(is.na(d))!=1){
        d$year <- years[i]
        if(years[i]==99){d$year<-2000}
        d$rnat <- rnat[k]
        dfill <- rbind(dfill,d)
      }
    } else {
      stop()
    }
      
  }
}
# save
write.dta(dfill,file="dyadmovesorigin24.dta")


# Figures 1A and B: Percent Moving Out and In and Outflows ------------------------------------
rm(list=ls())

# packages
library(foreign)
library(reshape2)
library(ggplot2)
library(ggthemes)
library(ggrepel)
library(circlize)
library(rtf)
library(stringr)

### Figure 1A:  

# load data move matrix (pooled across all years)
d <- read.csv("crosstab_rtolstate.csv")

# remove Xs
colnames(d) <- gsub("X.","",colnames(d))

# remove = 
for(i in 1:ncol(d)){
  d[,i] <- gsub("=","",d[,i])
}

# remove b
d <- d[-1,]

# get rownames rstate
rownames(d) <- d[,1]
d <- d[,-1]

# make numeric
for(i in 1:ncol(d)){
  d[,i] <-as.numeric(d[,i])
}

# remove those refugees arriving in states with fewer than 1000
arrivals <- apply(d,1,sum)
includestates <- names(arrivals[arrivals>1000])
includestates <- c("AZ","CA","CO","CT","FL","GA","IA","ID","IL","IN","KS","KY",
                   "LA","MA","MD","ME","MI","MN","MO","NC","ND","NE","NH","NJ",
                   "NV","NY","OH","OK","OR","PA","RI","SD","TN","TX","UT","VA",
                   "VT","WA","WI")

# restrict to states with more than 1000 arrivals
d <- d[c(rownames(d) %in% includestates), ]

# compute stayers
stayers <- c()
for (i in rownames(d)){
  stayers <- c(stayers,d[which(rownames(d)==i),
                         which(colnames(d)==i)
                         ]
  )
}
st <- data.frame(stayers)
st$state <- rownames(d)
st$stayers==diag(as.matrix(d))

# arrivals 	
arrivals <- apply(d,1,sum)
as       <- data.frame(arrivals)
as$state <- rownames(as)

# green cards
greencard <- apply(d,2,sum)
gs       <- data.frame(greencard)
gs$state <- rownames(gs)

# merge
dd <- merge(as,gs,by="state")
dd <- merge(dd,st,by="state")

# compute net flows
dd$delta    <- dd$greencard - dd$arrivals 
# outflows
dd$leaving  <- dd$arrivals-dd$stayers
# inflows
dd$incoming <- dd$greencard-dd$stayers

# percent moving out
dd$percentout <- 100*(dd$leaving/dd$arrivals)

# percent moving to destination
dd$percenttostate <- 100*(dd$incoming)/sum(dd$incoming)

### Figure 1A: Percent Moving Out 
dd$groupname <- NA
for(i in 1:nrow(dd)){
  dd$groupname[i] <- paste(dd$state[i]," (N=",dd$arrivals[i],")",sep="")
}
dd <- dd[order(dd$percentout),]
dd$groupname <- factor(dd$groupname,levels = unique(dd$groupname))

ggplot(dd,aes(x=groupname,y=percentout)) + geom_bar(stat="identity",position = "dodge",width=0.35,fill="indianred1") + coord_flip() +
  theme_economist(dkpanel = TRUE) + ylim(0,50) +
  theme(axis.title.y = element_blank(),
        axis.title.x = element_text(size=13),axis.text=element_text(size=13)) + ylab("Moved Out of State (%)")#
ggsave("fig1A.pdf",width=11,height=8)   


### Figure 1C: Scatter of Net in and Outflows
ggplot(dd,aes(x=leaving,y=incoming,label=state)) + geom_point() + 
  geom_label_repel() + scale_x_continuous("Number of Refugees Moving Out of State",limits=c(0,8700)) + 
  scale_y_continuous("Number of Refugees Moving to State",limits=c(0,8700)) + 
  theme_economist(dkpanel = TRUE,horizontal = TRUE) +
  theme(axis.title = element_text(size=13),axis.text=element_text(size=13)) +
  geom_abline(slope=1,intercept=0,col="red")
ggsave("fig1B.pdf",width=11,height=8)


# Figure 1C: Chord of Moving ----------------------------------------------
rm(list=ls())

# load data moving matrix (pooled across all years)
d <- read.csv("crosstab_rtolstate.csv")

# remove Xs
colnames(d) <- gsub("X.","",colnames(d))

# remove = 
for(i in 1:ncol(d)){
  d[,i] <- gsub("=","",d[,i])
}

# remove b
d <- d[-1,]

# get rownames rstate
rownames(d) <- d[,1]
d <- d[,-1]

includestates <- c("AZ","CA","CO","CT","FL","GA","IA","ID","IL","IN","KS","KY",
                   "LA","MA","MD","ME","MI","MN","MO","NC","ND","NE","NH","NJ",
                   "NV","NY","OH","OK","OR","PA","RI","SD","TN","TX","UT","VA",
                   "VT","WA","WI")

d <- d[c(rownames(d) %in% includestates), ]

# make numeric
for(i in 1:ncol(d)){
  d[,i] <-as.numeric(d[,i])
}

# set top states
K <- 8

## reduce matrix to top K plus other

# top K arrival states
arrivals <- apply(d,1,sum)
toparrival <- names(sort(arrivals,decreasing = T))[1:K]
as       <- data.frame(arrivals)
as$state <- rownames(as)

# top K destination states
dest    <- apply(d,2,sum)
topdest <- names(sort(dest,decreasing = T))[1:K]
ds       <- data.frame(dest)
ds$state <- rownames(ds)

# merge
ddd <- merge(as,ds,by="state")

# top K states on abs(net moves)
ddd$net <- ddd$dest-ddd$arrivals
ddd <- ddd[order(abs(ddd$net),decreasing = T),]
topmovers <- ddd$state[1:K]

namesin <- unique(c(toparrival,topdest,topmovers))
K <- length(namesin)
K

# move top K to top K
d1 <- d[(rownames(d) %in% namesin) == T, (colnames(d) %in% namesin) ==T ]

# move top K to other
d2 <- d[(rownames(d) %in% namesin) == T, (colnames(d) %in% namesin) ==F ]
Other <- apply(d2,1,sum)
d1 <- cbind(d1,Other)

# move from other to K
d2 <- d[rownames(d) %in% namesin == F, colnames(d) %in% namesin ==T ]
Other <- apply(d2,2,sum)

# add stayers (other at r and other at l)
Other <- c(Other,sum(d[rownames(d) %in% namesin == F, colnames(d) %in% namesin ==F ]))
names(Other)[K+1] <- "Other"

d1 <- rbind(d1,Other)
rownames(d1)[K+1] <- "Other"

# prep for plotting
d <- d1
d$rstate <- rownames(d)
dd <- melt(d,id="rstate")
names(dd)[2] <- "lstate"
dd$id <- 1:nrow(dd)

dd$rstate  <- as.character(dd$rstate)
dd$lstate   <- as.character(dd$lstate)
dd$stayer <- dd$rstate == dd$lstate

dd$From <- as.character(dd$rstate)
dd$To   <- as.character(dd$lstate)
dd$Weight <- dd$value

pal1 <- c(AZ="#A6CEE3", CA="#1F78B4", GA="#B2DF8A", IA="#33A02C", ID="#FB9A99",KS="#E31A1C", 
          MI="#FF7F00" , MN="#FDBF6F", NE="#CAB2D6", NY="#6A3D9A",
          PA="gold" , TX="#FFFF99", WA="#B15928",Other="grey90")

pal <- pal1

states <- unique(dd$rstate)
grid.col2  <- pal[1:length(states)]
names(grid.col2 ) <- states

pdf(paste("fig1C_min10.pdf",sep=""),width=9,height=9)

circos.clear()
circos.par(start.degree = 90, clock.wise = TRUE)
chordDiagram(dd[dd$stayer==0,],directional = 1,self.link = 1,transparency = .1,diffHeight = uh(4, "mm"),
             grid.col = grid.col2,link.visible=dd$value>=10,
             annotationTrack = c("axis","grid"), 
             preAllocateTracks = list(track.height = max(strwidth(unlist(dimnames(d))))))


circos.track(track.index = 1, panel.fun = function(x, y) {
  circos.text(CELL_META$xcenter, CELL_META$ylim[1], CELL_META$sector.index, 
              facing = "clockwise", niceFacing = TRUE, adj = c(0, 0.5))
}, bg.border = NA)
title(main = paste("State to State Movement",sep=""))
circos.clear()

dev.off() 


# Figure 2: Push Model ----------------------------------------------------
rm(list=ls())

## helper functions

# function to insert row in a given position r
insertRow2 <- function(df, nr, r) {
  df <- rbind(df,nr)
  df <- df[order(c(1:(nrow(df)-1),r-0.5)),]
  row.names(df) <- 1:nrow(df)
  return(df)  
}

# function to add rows above and or below pos1 and pos2 and label with group name
filler <- function(pos1=NA,pos2=NA,above=1,below=1,df=NA,nr=NA,gname=NA){
  
  # add rows above pos1 
  if(above>0){
    for(i in 1:above){
      df <- insertRow2(df=df,nr=nr,r=which(df$var==pos1))
    }
  }
  # add rows below pos1 or pos2 if latter is supplied
  if(below>0){
    # if pos2 is not suppied use pos1
    if(is.na(pos2)){
      for(i in 1:below){
        df <- insertRow2(df=df,nr=nr,r=which(df$var==pos1)+1)
      }
    } else {
      for(i in 1:below){
        df <- insertRow2(df=df,nr=nr,r=which(df$var==pos2)+1)
      }
    }
  }
  
  # add group name
  if(is.na(gname)==FALSE){  
    if(is.na(pos2)){
      df$group[(which(df$var==pos1)-above):(which(df$var==pos1)+below)] <- gname
    } else {
      df$group[(which(df$var==pos1)-above):(which(df$var==pos2)+below)] <- gname
    }
  }
  return(df)  
}

plotname <- "reg_movedrtol_bigstates_x1b"
d <- read.csv(paste(plotname,".csv",sep=""),header=T, stringsAsFactors = FALSE)

# name cols
names(d) <- c("var","pe","se")
# remove first row b and se
d <- d[-1,]
# remove obs
d <- d[-nrow(d),]
# remove const
d <- d[-nrow(d),]
# remove ref cats
d <- d[d$se!=".",]

# set up group var
d$group="TBA"

# placeholderrow
emptyrow <-  c("holder",888,0,NA)
offset <- "  "

# relationship
d <- filler(pos1="CHILD",pos2="SPOUSE",gname="Relationship",
            df=d,nr=emptyrow,above=2,below=1)
d$var[d$group=="Relationship"] <- 
  c("Relationship:",
    paste(offset,c("Principal Applicant","Child","Other","Parent","Sibling","Spouse")),
    paste(rep(" ",1),collapse=""))

# gender
d <- filler(pos1="F",gname="Gender",
            df=d,nr=emptyrow,above=0,below=1)
d$var[d$group=="Gender"] <- c("Female",paste(rep(" ",2),collapse=""))

# education
d <- filler(pos1="Less than Secondary",pos2="University",gname="Education",
            df=d,nr=emptyrow,above=2,below=1)
d$var[d$group=="Education"] <- 
  c("Education:",
    paste(offset,c("None/Unknown","Primary","Secondary","Postsecondary","University")),
    paste(rep(" ",3),collapse=""))

# origin
d <- filler(pos1="AFGHANISTAN",pos2="VIETNAM",gname="Nationality",
            df=d,nr=emptyrow,above=2,below=1)
d$var[d$group=="Nationality"] <- c("Nationality:",
                                   paste(offset,c("Other ","Afghanistan","Bhutan","Bosnia and Herzegovina",
                                                  "Burma","Dem. Rep. of the Congo","Eritrea",
                                                  "Ethiopia","Iran","Iraq","Liberia",
                                                  "Russia","Somalia","Sudan","Ukraine","Vietnam"
                                   )),paste(rep(" ",4),collapse=""))

# us ties
d <- filler(pos1="free_n=1",gname="Ties",
            df=d,nr=emptyrow,above=0,below=1)
d$var[d$group=="Ties"] <- c("No US Ties",paste(rep(" ",5),collapse=""))

# remove agency
d <- d[ (d$var %in% c("HIAS","CWS","DFMS","ECDC","IOWA","IRC","KHRW","LIRS","RVTV","USCCB","USCRI","WR"))==FALSE ,]

# age
d <- filler(pos1="rage_n=1",pos2="rage_n=5",gname="Age",
            df=d,nr=emptyrow,above=2,below=1)
d$var[d$group=="Age"] <- c("Arrival Age:",
                           paste(offset,c("18-20","20s","30s","40s","50s","60+")),
                           paste(rep(" ",6),collapse=""))

# remove arrival year
d <- d[-(which(d$var=="rarrivefy_n=2001"):which(d$var=="rarrivefy_n=2014")),]

# remove state
data(state)
d <- d[ (d$var %in% c(state.abb,"DC","GU","PR"))==FALSE ,]

# Case size
d <- filler(pos1="casesize_n=2",pos2="casesize_n=4",gname="Casesize",
            df=d,nr=emptyrow,above=2,below=0)
d$var[d$group=="Casesize"] <- c("Family Size:",
                                paste(offset,c("1","2","3","4+")))

# drop time to LPR
d <- d[-(which(d$var=="grmonthstolpr_receipt=1"):which(d$var=="grmonthstolpr_receipt=4")),]

# estimates
d$pe <- as.numeric(d$pe)
d$se <- as.numeric(d$se)
d$upper <- d$pe + d$se*1.96
d$lower <- d$pe - d$se*1.96

# tag reference categories
d$ref <- 0
d$ref[grep("Low",d$var)] <- 1
d$ref[grep(" Medium Low",d$var)] <- 0
d$ref[grep("Other ",d$var)] <- 1
d$ref[grep("Principal Applicant",d$var)] <- 1
d$ref[grep("None/Unknown",d$var)] <- 1
d$ref[grep("18-20",d$var)] <- 1
d$ref[grep(" 1",d$var)] <- 1
d$ref[grep("2000",d$var)] <- 1

d$cati <- "plain"

d$cati[which(d$var == "Relationship:")] <- "bold"
d$cati[which(d$var == "Education:")] <- "bold"
d$cati[which(d$var == "Female")] <- "bold"
d$cati[which(d$var == "Nationality:")] <- "bold"
d$cati[which(d$var == "No US Ties")] <- "bold"
d$cati[which(d$var == "Arrival Age:")] <- "bold"
d$cati[which(d$var == "Arrival Year:")] <- "bold"
d$cati[which(d$var == "Family Size:")] <- "bold"

d$pe[d$ref==1] <- 0
d$ref <- factor(d$ref)
d$var <- factor(d$var,levels=unique(d$var)[length(d$var):1])

## start plots  
p = ggplot(d,aes(y=pe,x=var,colour=group,shape=ref))#,colour = Portfolio_type))
p = p + coord_flip(ylim = c(-.32,.32))
p = p + geom_hline(yintercept = 0,size=.5,colour="blue",linetype="dotted") 
p = p + geom_pointrange(aes(ymin=lower,ymax=upper),position=position_dodge(width=.1),size=.6)
p = p + scale_y_continuous(name="Change in Probability of Moving Out of Arrival State",
                           breaks=round(seq(-.4,.4,.2),1),labels=c("-.4","-.2","0",".2",".4"))
p = p + scale_colour_discrete("Attribute:") + scale_x_discrete(name="") + scale_shape_manual(name="",values=c(16,1))
print(p)

theme_bw1 <- function(base_size = 10, base_family = "", face="plain") {
  theme_grey(base_size = base_size, base_family = base_family) %+replace%
    theme(
      axis.text.y =       element_text(size = base_size ,face= face , colour = "black", hjust = 0 , vjust=.5 ),
      axis.text.x =       element_text(size = base_size , colour = "black", hjust = 0 , vjust=.5 ), # changes position of X axis text
      #  axis.ticks =        element_line(colour = "grey50"),
      axis.title.y =      element_text(size = base_size,angle=90,vjust=.01,hjust=.1),
      legend.position = "none"
    )
}
base_size = 10
base_family = ""
face=d$cati[nrow(d):1]
p <- p + theme_economist(dkpanel = T,horizontal=T) + theme(
  axis.text.y =       element_text(size = base_size ,face= face , colour = "black"),
  axis.text.x =       element_text(size = base_size , colour = "black"), # changes position of X axis text
  # axis.ticks =        element_line(colour = "grey50"),
  axis.title.y =      element_text(size = base_size,angle=90),
  legend.position = "none",panel.grid=element_line(size = .1)
) 
p
ggsave(paste("fig2.pdf",sep=""),p,width=8,height=8)



# Figure 3: Gravity Model -------------------------------------------------
plotname <- "gravity_main"
d <- read.csv(paste(plotname,".csv",sep=""),header=T, stringsAsFactors = FALSE)

# name cols
names(d) <- c("var","pe","se")
# remove first row b and se
d <- d[-1,]
# remove obs
d <- d[-nrow(d),]
# remove const
d <- d[-nrow(d),]
# remove ref cats
d <- d[d$se!=".",]
d <- d[d$pe!="0",]

# set up group var
d$group="TBA"

# placeholderrow
emptyrow <-  c("holder",888,0,NA)
offset <- "  "

# Co share
cats <- c("1st quintile","2nd quintile","3rd quintile","4th quintile","5th quintile")

d <- filler(pos1="1.gr_l_coshare",pos2="4.gr_l_coshare",gname="Destination State: Share of Co-nationals",
            df=d,nr=emptyrow,above=3,below=1)
d$var[d$group=="Destination State: Share of Co-nationals"] <- c("Share of Co-nationals:"," Destination State",
                                                                paste(offset,paste(cats,paste(rep(" ",1),collapse="")
                                                                )),
                                                                paste(rep(" ",1),collapse=""))

d <- filler(pos1="1.gr_r_coshare",pos2="4.gr_r_coshare",gname="Arrival State: Share of Co-nationals",
            df=d,nr=emptyrow,above=2,below=1)
d$var[d$group=="Arrival State: Share of Co-nationals"] <- c(" Arrival State",
                                                            paste(offset,paste(cats,paste(rep(" ",2),collapse="")
                                                            )),
                                                            paste(rep(" ",2),collapse=""))

# cost of housing
d <- filler(pos1="1.gr_l_col_housing",pos2="4.gr_l_col_housing",gname="Destination State: Cost of Housing",
            df=d,nr=emptyrow,above=3,below=1)
d$var[d$group=="Destination State: Cost of Housing"] <- c("Cost of Housing:"," Destination State ",
                                                          paste(offset,paste(cats,paste(rep(" ",3),collapse="")
                                                          )),
                                                          paste(rep(" ",3),collapse=""))

d <- filler(pos1="1.gr_r_col_housing",pos2="4.gr_r_col_housing",gname="Arrival State: Cost of Housing",
            df=d,nr=emptyrow,above=2,below=1)
d$var[d$group=="Arrival State: Cost of Housing"] <- c(" Arrival State ",
                                                      paste(offset,paste(cats,paste(rep(" ",4),collapse="")
                                                      )),
                                                      paste(rep(" ",4),collapse=""))

# unemployment 
d <- filler(pos1="1.gr_l_unemployment",pos2="4.gr_l_unemployment",gname="Destination State: Unemployment",
            df=d,nr=emptyrow,above=3,below=1)
d$var[d$group=="Destination State: Unemployment"] <- c("Unemployment:"," Destination State  ",
                                                       paste(offset,paste(cats,paste(rep(" ",5),collapse="")
                                                       )),
                                                       paste(rep(" ",5),collapse=""))

d <- filler(pos1="1.gr_r_unemployment",pos2="4.gr_r_unemployment",gname="Arrival State: Unemployment",
            df=d,nr=emptyrow,above=2,below=1)
d$var[d$group=="Arrival State: Unemployment"] <- c(" Arrival State  ",
                                                   paste(offset,paste(cats,paste(rep(" ",6),collapse="")
                                                   )),
                                                   paste(rep(" ",6),collapse=""))


# welfare spending per poor
d <- filler(pos1="1.gr_l_expend_welfare_poorpop",pos2="4.gr_l_expend_welfare_poorpop",gname="Destination State: Welfare spending",
            df=d,nr=emptyrow,above=3,below=1)
d$var[d$group=="Destination State: Welfare spending"] <- c("Welfare spending:"," Destination State   ",
                                                           paste(offset,paste(cats,paste(rep(" ",7),collapse="")
                                                           )),
                                                           paste(rep(" ",7),collapse=""))

d <- filler(pos1="1.gr_r_expend_welfare_poorpop",pos2="4.gr_r_expend_welfare_poorpop",gname="Arrival State: Welfare spending",
            df=d,nr=emptyrow,above=2,below=1)
d$var[d$group=="Arrival State: Welfare spending"] <- c(" Arrival State   ",
                                                       paste(offset,paste(cats,paste(rep(" ",8),collapse="")
                                                       )),
                                                       paste(rep(" ",8),collapse=""))

# GDP per capita
d <- filler(pos1="1.gr_l_realgdp_percap",pos2="4.gr_l_realgdp_percap",gname="Destination State: GDP per capita",
            df=d,nr=emptyrow,above=3,below=1)
d$var[d$group=="Destination State: GDP per capita"] <- c("GDP per capita:"," Destination State    ",
                                                         paste(offset,paste(cats,paste(rep(" ",9),collapse="")
                                                         )),
                                                         paste(rep(" ",9),collapse=""))

d <- filler(pos1="1.gr_r_realgdp_percap",pos2="4.gr_r_realgdp_percap",gname="Arrival State: GDP per capita",
            df=d,nr=emptyrow,above=2,below=1)
d$var[d$group=="Arrival State: GDP per capita"] <- c(" Arrival State    ",
                                                     paste(offset,paste(cats,paste(rep(" ",10),collapse="")
                                                     )),
                                                     paste(rep(" ",10),collapse=""))

# govenor
d <- filler(pos1="l_govparty_dem",gname="Destination State: Governor Democrat",
            df=d,nr=emptyrow,above=1,below=1)
d$var[d$group=="Destination State: Governor Democrat"] <- c("Democratic Governor:"," Destination State     ",paste(rep(" ",11),collapse=""))

# govenor
d <- filler(pos1="r_govparty_dem",gname="Arrival State: Governor Democrat",
            df=d,nr=emptyrow,above=0,below=1)
d$var[d$group=="Arrival State: Governor Democrat"] <- c(" Arrival State     ",paste(rep(" ",12),collapse=""))


d <- d[-(which(d$var == "1.gr_lnarrivals"): which(d$var =="2014.year")),]


# estimates
d$pe <- as.numeric(d$pe)
d$se <- as.numeric(d$se)
d$upper <- d$pe + d$se*1.96
d$lower <- d$pe - d$se*1.96

# tag reference categories
d$ref <- 0
d$ref[grep("1st quintile",d$var)] <- 1

d$cati <- "plain"
d$cati[grep("Share",d$var)] <- "bold"
d$cati[grep("Cost",d$var)] <- "bold"
d$cati[grep("Unemploy",d$var)] <- "bold"
d$cati[grep("Welfare",d$var)] <- "bold"
d$cati[grep("GDP",d$var)] <- "bold"
d$cati[grep("Democrat",d$var)] <- "bold"

d$cati[grep("State",d$var)] <- "italic"

d$group2 <- "TBA"
d$group2[d$group %in% c("Destination State: Share of Co-nationals",
                        "Arrival State: Share of Co-nationals")] <- "Share of Co-nationals"

d$group2[d$group %in% c("Destination State: Cost of Housing",
                        "Arrival State: Cost of Housing")] <- "Cost of Housing"

d$group2[d$group %in% c("Destination State: Unemployment",
                        "Arrival State: Unemployment")] <- "Unemployment"

d$group2[d$group %in% c("Destination State: Welfare spending",
                        "Arrival State: Welfare spending")] <- "Welfare spending"

d$group2[d$group %in% c("Destination State: GDP per capita",
                        "Arrival State: GDP per capita")] <- "GDP per capita"

d$group2[d$group %in% c("Destination State: Governor Democrat",
                        "Arrival State: Governor Democrat")] <- "Governor Democrat"

d$pe[d$ref==1] <- 0
d$ref <- factor(d$ref)
d$var <- factor(d$var,levels=unique(d$var)[length(d$var):1])

## start plots  
p = ggplot(d,aes(y=pe,x=var,colour=group2,shape=ref))#,colour = Portfolio_type))
p = p + coord_flip(ylim = c(-.275,.275))
p = p + geom_hline(yintercept = 0,size=.5,colour="blue",linetype="dotted") 
p = p + geom_pointrange(aes(ymin=lower,ymax=upper),position=position_dodge(width=.1),size=.6)
p = p + scale_y_continuous(name="Percent Change in Secondary Migration Between States",
                           breaks=round(seq(-.2,.2,.1),1),labels=c("-20","-10","0","10","20"))
p = p + scale_colour_discrete("Attribute:") + scale_x_discrete(name="") + scale_shape_manual(name="",values=c(16,1))
print(p)

theme_bw1 <- function(base_size = 10, base_family = "", face="plain") {
  theme_grey(base_size = base_size, base_family = base_family) %+replace%
    theme(
      axis.text.y =       element_text(size = base_size ,face= face , colour = "black", hjust = 0 , vjust=.5 ),
      axis.text.x =       element_text(size = base_size , colour = "black", hjust = 0 , vjust=.5 ), # changes position of X axis text
      #axis.ticks =        element_line(colour = "grey50"),
      axis.title.y =      element_text(size = base_size,angle=90,vjust=.01,hjust=.1),
      legend.position = "none"
    )
}
base_size = 10
face=d$cati[nrow(d):1]
p <- p + theme_bw1(base_size = 10, base_family = "",face=d$cati[nrow(d):1])
p <- p +
  theme_economist(dkpanel = T,horizontal=T) + theme(
    axis.text.y =       element_text(size = base_size ,face= face , colour = "black"),
    axis.text.x =       element_text(size = base_size , colour = "black"), # changes position of X axis text
    #axis.ticks =        element_line(colour = "grey50"),
    axis.title.y =      element_text(size = base_size,angle=90),
    legend.position = "none",panel.grid=element_line(size = .1)
  )

ggsave(paste("fig3.pdf",sep=""),p,width=8,height=10)



# SI: Descriptive Stats Table 1 ---------------------------------------------------

# data directory
filenames <- c("casesize","free","rage","rarrivefy","reduccat","relat","rgender","rnat_top")
store <- list()
for(i in 1:length(filenames)){
  d <- read.csv(paste("crosstab_code_",filenames[i],".csv",sep=""),header=T, stringsAsFactors = FALSE)
  temp <- c()
  for(k in 1:nrow(d)){
    temp <- c(rep(d[k,1],d[k,3]),temp)
  }
  store[[filenames[i]]] <- temp
}
d <- read.csv(paste("crosstab_movedrtol_free.csv",sep=""),header=T, stringsAsFactors = FALSE)
d$ones  <- round(d$movedrtol*d$obs,0)
temp <- c(rep(1,sum(d$ones)),rep(0,sum(d$obs)-sum(d$ones)))
store[["moved"]] <- temp
dd <- data.frame(moved = store$moved,
                 casesize=store$casesize,
                 free=store$free,
                 rage=store$rage,
                 rarrivefy=store$rarrivefy,
                 reduccat=store$reduccat,
                 relat=store$relat,
                 rgender=store$rgender,
                 rnat=store$rnat_top
)
dd <- cbind(dd,model.matrix(~-1+dd$rnat))

# family size
dd$famsize1 <- as.numeric(dd$casesize==1)
dd$famsize2 <- as.numeric(dd$casesize==2)
dd$famsize3 <- as.numeric(dd$casesize==3)
dd$famsize4 <- as.numeric(dd$casesize>=4)

# relationship
dd$relat1 <-   as.numeric(dd$relat=="PA")
dd$relat2 <-   as.numeric(dd$relat=="CHILD")
dd$relat3 <-   as.numeric(dd$relat=="OTHER")
dd$relat4 <-   as.numeric(dd$relat=="PARENT")
dd$relat5 <-   as.numeric(dd$relat=="SIBLING")
dd$relat6 <-   as.numeric(dd$relat=="SPOUSE")

# gender
dd$female <- as.numeric(dd$rgender=="F")

# education
dd$educ1 <-   as.numeric(dd$reduccat=="None")
dd$educ2 <-   as.numeric(dd$reduccat=="Less than Secondary")
dd$educ3 <-   as.numeric(dd$reduccat=="Secondary")
dd$educ4 <-   as.numeric(dd$reduccat=="PreUniversity")
dd$educ5 <-   as.numeric(dd$reduccat=="University")

# us ties
dd$ties <- as.numeric(dd$free==1)

# age
dd$age1 <-   as.numeric(dd$rage >=18 & dd$rage<=20)
dd$age2 <-   as.numeric(dd$rage >=21 & dd$rage<=30)
dd$age3 <-   as.numeric(dd$rage >=31 & dd$rage<=40)
dd$age4 <-   as.numeric(dd$rage >=41 & dd$rage<=50)
dd$age5 <-   as.numeric(dd$rage >=51 & dd$rage<=60)
dd$age6 <-   as.numeric(dd$rage >=61 )

getstats <- function(x){c(round(mean(x),2),
                          round(sd(x),2),
                          round(min(x),2),
                          round(max(x),2)
)}

vars <- c("moved",
          paste("famsize",1:4,sep=""),
          paste("relat",1:6,sep=""),
          paste("female"),
          paste("educ",1:5,sep=""),
          paste("ties"),
          paste("age",1:6,sep=""),
          names(dd)[which(names(dd)=="dd$rnatAFGHANISTAN"):which(names(dd)=="dd$rnatVIETNAM")],
          "rarrivefy"
)
out <- t(apply(dd[,vars],2,getstats))
rowvarnames <- c("Moved out of State",
                 paste("Family Size:", c("1","2","3","4+")),
                 paste("Relationship:",c("Principal Applicant","Child","Other","Parent","Sibling","Spouse")),
                 paste("Gender:",c("Female")),
                 paste("Education:",c("None/Unknown","Primary","Secondary","Postsecondary","University")),
                 paste("Ties:",c("No US Ties")),
                 paste("Age: ",c("18-20","20s","30s","40s","50s","60+")),
                 paste("Country:",c("Afghanistan","Bhutan","Bosnia And Herzegovina",
                                    "Burma","Dem. Rep. Congo","Eritrea",
                                    "Ethiopia","Iran","Iraq","Liberia","Other",
                                    "Russia","Somalia","Sudan","Ukraine","Vietnam")),
                 "Year of Arrival"
)
rownames(out) <- rowvarnames 
out         <- data.frame(out)
out$varname <- row.names(out)
out <- out[,c(5,1:4)]
names(out) <- c("Variable","Mean","SD","Min","Max")
rtffile <- RTF("descriptives.doc") 
addParagraph(rtffile, "Descriptive Statistics Push Model:\n")
addTable(rtffile,out)
done(rtffile)

# SI Figures s1, s2, and s3: Match Rates by Arrival State, Origin, and year -------------------------------------------------

rm(list=ls()[!(ls() %in% c("path","code","source","results"))])

# function to prep data for descriptive plots
dataprepdesc <- function(name = NA){
  d <- read.csv(paste(name,sep=""),stringsAsFactors = FALSE)
  colnames(d) <- c("group","mean","obs")
  d$mean <- d$mean*100
  return(d)
}

# by arrival state
setwd(paste(path,source,sep=""))
d <- dataprepdesc("matchrate_match1_r_l_rstate.csv")

# d <- d[d$obs>1000,]

for(i in 1:nrow(d)){
  #d$groupname[i] <- paste(d$group[i]," (N=",d$obs[i],")",sep="")
  d$groupname[i] <- d$group[i]
  
}
N <- length(unique(d$groupname))
d <- d[order(d$mean),]
d$groupname <- factor(d$groupname,levels = unique(d$groupname))
ggplot(d,aes(x=groupname,y=mean)) + geom_bar(stat="identity",position = "dodge",width=0.35,fill="indianred1") + coord_flip() +
  theme_economist(dkpanel = TRUE) + ylim(0,100) +
  theme(axis.title.y = element_blank(),
        axis.title.x = element_text(size=13),axis.text=element_text(size=13))  + ylab("Matched to LPR Data (%)") #+ ggtitle("Refugee Naturalization Rates by Nationality")
setwd(paste(path,results,sep=""))
ggsave("SIFig_Match_Rates_by_rstate.pdf",width=11,height=8)

# by arrival year
setwd(paste(path,source,sep=""))
d <- dataprepdesc("matchrate_match1_r_l_rarrivefy.csv")

#  d <- d[d$obs>1000,]

for(i in 1:nrow(d)){
  #d$groupname[i] <- paste(d$group[i]," (N=",d$obs[i],")",sep="")
  d$groupname[i] <- d$group[i]
  
}
N <- length(unique(d$groupname))
d <- d[order(d$group),]
d$groupname <- factor(d$groupname,levels = unique(d$groupname))
ggplot(d,aes(x=groupname,y=mean)) + geom_bar(stat="identity",position = "dodge",width=0.35,fill="indianred1") + coord_flip() +
  theme_economist(dkpanel = TRUE) + ylim(0,100) +
  theme(axis.title.y = element_blank(),
        axis.title.x = element_text(size=13),axis.text=element_text(size=13))  + ylab("Matched to LPR Data (%)") #+ ggtitle("Refugee Naturalization Rates by Nationality")
setwd(paste(path,results,sep=""))
ggsave("SIFig_Match_Rates_by_rarrivefy.pdf",width=11,height=8)

# by origin
setwd(paste(path,source,sep=""))
d <- dataprepdesc("matchrate_match1_r_l_rnat_top.csv")

# d <- d[d$obs>1000,]

for(i in 1:nrow(d)){
  #d$groupname[i] <- paste(d$group[i]," (N=",d$obs[i],")",sep="")
  d$groupname[i] <- str_to_title(d$group[i])
  
}
N <- length(unique(d$groupname))
d <- d[order(d$mean),]
d$groupname <- factor(d$groupname,levels = unique(d$groupname))
ggplot(d,aes(x=groupname,y=mean)) + geom_bar(stat="identity",position = "dodge",width=0.35,fill="indianred1") + coord_flip() +
  theme_economist(dkpanel = TRUE) + ylim(0,100) +
  theme(axis.title.y = element_blank(),
        axis.title.x = element_text(size=13),axis.text=element_text(size=13))  + ylab("Matched to LPR Data (%)") #+ ggtitle("Refugee Naturalization Rates by Nationality")
setwd(paste(path,results,sep=""))
ggsave("SIFig_Match_Rates_by_origin.pdf",width=11,height=8)

# correlation move rates arrival match rates
setwd(paste(path,source,sep=""))
d <- dataprepdesc("matchrate_match1_r_l_rstate.csv")
# d <- d[d$obs>1000,]
d$state <- d$group

setwd(paste(path,source,sep=""))
d1 <- dataprepdesc("crosstab_movedrtol_rstate.csv")
names(d1) <- c("state","move","obs_move") 

#d1 <- cbind(d,d1)
d1 <- merge(d,d1,by="state")

ggplot(d1,aes(x=mean,y=move,label=group)) + geom_point() + 
  geom_label_repel() + scale_x_continuous("Matched to LPR Data (%)",limits=c(90,100)) + 
  scale_y_continuous("Moved out of State (%)",limits=c(0,70)) + 
  theme_economist(dkpanel = TRUE,horizontal = TRUE) +
  theme(axis.title = element_text(size=13),axis.text=element_text(size=13)) +
  geom_smooth(col="red",method="lm")

setwd(paste(path,results,sep=""))
ggsave("SIFig_match_move_rate.pdf",width=11,height=8)



# SI Figure s4: Push Model with Geo factors ----------------------------------------------------
plotname <- "reg_movedrtol_bigstates_x3b"
d <- read.csv(paste(plotname,".csv",sep=""),header=T, stringsAsFactors = FALSE)

# name cols
names(d) <- c("var","pe","se")
# remove first row b and se
d <- d[-1,]
# remove obs
d <- d[-nrow(d),]
# remove const
d <- d[-nrow(d),]
# remove ref cats
d <- d[d$se!=".",]

# set up group var
d$group="TBA"

# placeholderrow
emptyrow <-  c("holder",888,0,NA)
offset <- "  "

# relationship
d <- filler(pos1="CHILD",pos2="SPOUSE",gname="Relationship",
            df=d,nr=emptyrow,above=2,below=1)
d$var[d$group=="Relationship"] <- 
  c("Relationship:",
    paste(offset,c("Principal Applicant","Child"," Other","Parent","Sibling","Spouse")),
    paste(rep(" ",1),collapse=""))

# gender
d <- filler(pos1="F",gname="Gender",
            df=d,nr=emptyrow,above=0,below=1)
d$var[d$group=="Gender"] <- c("Female",paste(rep(" ",2),collapse=""))

# education
d <- filler(pos1="Less than Secondary",pos2="University",gname="Education",
            df=d,nr=emptyrow,above=2,below=1)
d$var[d$group=="Education"] <- 
  c("Education:",
    paste(offset,c("None/Unknown","Primary","Secondary","Postsecondary","University")),
    paste(rep(" ",3),collapse=""))

# origin
d <- filler(pos1="AFGHANISTAN",pos2="UKRAINE",gname="Nationality",
            df=d,nr=emptyrow,above=2,below=1)
d$var[d$group=="Nationality"] <- c("Nationality:",
                                   paste(offset,c("Vietnam","Afghanistan","Bhutan","Bosnia And Herzegovina",
                                                  "Burma","Dem. Rep. Congo","Eritrea",
                                                  "Ethiopia","Iran","Iraq","Liberia",
                                                  "Russia","Somalia","Sudan","Ukraine"
                                   )),paste(rep(" ",4),collapse=""))

# us ties
d <- filler(pos1="free_n=1",gname="Ties",
            df=d,nr=emptyrow,above=0,below=1)
d$var[d$group=="Ties"] <- c("No US Ties",paste(rep(" ",5),collapse=""))

# remove agency
d <- d[ (d$var %in% c("HIAS","CWS","DFMS","ECDC","IOWA","IRC","KHRW","LIRS","RVTV","USCCB","USCRI","WR"))==FALSE ,]

# age
d <- filler(pos1="rage_n=1",pos2="rage_n=5",gname="Age",
            df=d,nr=emptyrow,above=2,below=1)
d$var[d$group=="Age"] <- c("Arrival Age:",
                           paste(offset,c("18-20","20s","30s","40s","50s","60+")),
                           paste(rep(" ",6),collapse=""))

# remove arrival year
d <- d[-(which(d$var=="rarrivefy_n=2001"):which(d$var=="rarrivefy_n=2014")),]


# remove state
data(state)
d <- d[ (d$var %in% c(state.abb,"DC","PR","GU"))==FALSE ,]

# Case size
d <- filler(pos1="casesize_n=2",pos2="casesize_n=4",gname="Casesize",
            df=d,nr=emptyrow,above=2,below=1)
d$var[d$group=="Casesize"] <- c("Family Size:",
                                paste(offset,c("1","2","3","4+")),paste(rep(" ",8),collapse=""))

d <- d[-(which(d$var=="grmonthstolpr_receipt=1"):which(d$var=="grmonthstolpr_receipt=4")),]

# Co share
cats <- c("1st quintile","2nd quintile","3rd quintile","4th quintile","5th quintile")

d <- filler(pos1="grcoshare=1",pos2="grcoshare=4",gname="Share of Co-nationals",
            df=d,nr=emptyrow,above=2,below=1)
d$var[d$group=="Share of Co-nationals"] <- c("Share of Co-nationals:",
                                             paste(offset,paste(cats,paste(rep(" ",0),collapse="")
                                             )),
                                             paste(rep(" ",9),collapse=""))

# cost of housing
d <- filler(pos1="grcol_housing=1",pos2="grcol_housing=4",gname="Cost of Housing",
            df=d,nr=emptyrow,above=2,below=1)
d$var[d$group=="Cost of Housing"] <- c("Cost of Housing:",
                                       paste(offset,paste(cats,paste(rep(" ",2),collapse="")
                                       )),
                                       paste(rep(" ",10),collapse=""))

# unemployment
d <- filler(pos1="grunemployment=1",pos2="grunemployment=4",gname="Unemployment",
            df=d,nr=emptyrow,above=2,below=1)
d$var[d$group=="Unemployment"] <- c("Unemployment:",
                                    paste(offset,paste(cats,paste(rep(" ",3),collapse="")
                                    )),
                                    paste(rep(" ",11),collapse=""))

# welfare spending per poor
d <- filler(pos1="grexpend_welfare_poorpop=1",pos2="grexpend_welfare_poorpop=4",gname="Welfare spending",
            df=d,nr=emptyrow,above=2,below=1)
d$var[d$group=="Welfare spending"] <- c("Welfare spending per poor:",
                                        paste(offset,paste(cats,paste(rep(" ",4),collapse="")
                                        )),
                                        paste(rep(" ",12),collapse=""))

# GDP per capita
d <- filler(pos1="grrealgdp_percap=1",pos2="grrealgdp_percap=4",gname="GDP per capita",
            df=d,nr=emptyrow,above=2,below=1)
d$var[d$group=="GDP per capita"] <- c("GDP per capita:",
                                      paste(offset,paste(cats,paste(rep(" ",5),collapse="")
                                      )),
                                      paste(rep(" ",13),collapse=""))

# govenor
d <- filler(pos1="1 if Governor is a Democrat",gname="Governor",
            df=d,nr=emptyrow,above=0,below=1)
d$var[d$group=="Governor"] <- c("Democratic Governor:",paste(rep(" ",14),collapse=""))

# estimates
d$pe <- as.numeric(d$pe)
d$se <- as.numeric(d$se)
d$upper <- d$pe + d$se*1.96
d$lower <- d$pe - d$se*1.96

# tag reference categories
d$ref <- 0
d$ref[grep("Low",d$var)] <- 1
d$ref[grep(" Medium Low",d$var)] <- 0
d$ref[grep("Vietnam",d$var)] <- 1
d$ref[grep("Principal Applicant",d$var)] <- 1
d$ref[grep("None/Unknown",d$var)] <- 1
d$ref[grep("13-19",d$var)] <- 1
d$ref[grep(" 1",d$var)] <- 1
d$ref[grep("2000",d$var)] <- 1

d$cati <- "plain"

d$cati[which(d$var == "Relationship:")] <- "bold"
d$cati[which(d$var == "Education:")] <- "bold"
d$cati[which(d$var == "Female")] <- "bold"
d$cati[which(d$var == "Nationality:")] <- "bold"
d$cati[which(d$var == "No US Ties")] <- "bold"
d$cati[which(d$var == "Arrival Age:")] <- "bold"
d$cati[which(d$var == "Arrival Year:")] <- "bold"
d$cati[which(d$var == "Family Size:")] <- "bold"

d$cati[which(d$var == "Share of Co-nationals:")] <- "bold"
d$cati[which(d$var == "Cost of Housing:")] <- "bold"
d$cati[which(d$var == "Unemployment:")] <- "bold"
d$cati[which(d$var == "Welfare spending per poor:")] <- "bold"  
d$cati[which(d$var == "GDP per capita:")] <- "bold"  
d$cati[which(d$var == "Democratic Governor:")] <- "bold"  



d$pe[d$ref==1] <- 0
d$ref <- factor(d$ref)
d$var <- factor(d$var,levels=unique(d$var)[length(d$var):1])

## start plots  
p = ggplot(d,aes(y=pe,x=var,colour=group,shape=ref))#,colour = Portfolio_type))
p = p + coord_flip(ylim = c(-.35,.35))
p = p + geom_hline(yintercept = 0,size=.5,colour="blue",linetype="dotted") 
p = p + geom_pointrange(aes(ymin=lower,ymax=upper),position=position_dodge(width=.1),size=.6)
p = p + scale_y_continuous(name="Change in Probability of Moving Out of Arrival State",
                           breaks=round(seq(-.4,.4,.2),1),labels=c("-.4","-.2","0",".2",".4"))
p = p + scale_colour_discrete("Attribute:") + scale_x_discrete(name="") + scale_shape_manual(name="",values=c(16,1))
print(p)

theme_bw1 <- function(base_size = 10, base_family = "", face="plain") {
  theme_grey(base_size = base_size, base_family = base_family) %+replace%
    theme(
      axis.text.y =       element_text(size = base_size ,face= face , colour = "black"),
      axis.text.x =       element_text(size = base_size , colour = "black"), 
      axis.ticks =        element_line(colour = "grey50"),
      axis.title.y =      element_text(size = base_size,angle=90),
      legend.position = "none"
    )
}
base_size = 10
base_family = ""
face=d$cati[nrow(d):1]
p <- p + theme_economist(dkpanel = T,horizontal=T) + theme(
  axis.text.y =       element_text(size = base_size ,face= face , colour = "black"),
  axis.text.x =       element_text(size = base_size , colour = "black"), 
  axis.ticks =        element_line(colour = "grey50"),
  axis.title.y =      element_text(size = base_size,angle=90),
  legend.position = "none",panel.grid=element_line(size = .1)
) 
p
ggsave(paste("SEFig_PushModel_withGeo.pdf",sep=""),p,width=8,height=10)


# SI Figures s5 and s6: Push Model for Ties and No Ties ----------------------------------------------------
plotnames <- c("reg_movedrtol_free_1_x1b","reg_movedrtol_free_99_x1b")
filenames <- c("FreeCases","USTiesCases")
for(i in 1:2){
  setwd(paste(path,source,sep=""))
  
  plotname <- plotnames[i]
  d <- read.csv(paste(plotname,".csv",sep=""),header=T, stringsAsFactors = FALSE)
  
  # name cols
  names(d) <- c("var","pe","se")
  # remove first row b and se
  d <- d[-1,]
  # remove obs
  d <- d[-nrow(d),]
  # remove const
  d <- d[-nrow(d),]
  # remove ref cats
  d <- d[d$se!=".",]
  
  # set up group var
  d$group="TBA"
  
  # placeholderrow
  emptyrow <-  c("holder",888,0,NA)
  offset <- "  "
  
  # relationship
  d <- filler(pos1="CHILD",pos2="SPOUSE",gname="Relationship",
              df=d,nr=emptyrow,above=2,below=1)
  d$var[d$group=="Relationship"] <- 
    c("Relationship:",
      paste(offset,c("Principal Applicant","Child","Other","Parent","Sibling","Spouse")),
      paste(rep(" ",1),collapse=""))
  
  # gender
  d <- filler(pos1="F",gname="Gender",
              df=d,nr=emptyrow,above=0,below=1)
  d$var[d$group=="Gender"] <- c("Female",paste(rep(" ",2),collapse=""))
  
  # education
  d <- filler(pos1="Less than Secondary",pos2="University",gname="Education",
              df=d,nr=emptyrow,above=2,below=1)
  d$var[d$group=="Education"] <- 
    c("Education:",
      paste(offset,c("None/Unknown","Primary","Secondary","Postsecondary","University")),
      paste(rep(" ",3),collapse=""))
  
  # origin
  d <- filler(pos1="AFGHANISTAN",pos2="VIETNAM",gname="Nationality",
              df=d,nr=emptyrow,above=2,below=1)
  d$var[d$group=="Nationality"] <- c("Nationality:",
                                     paste(offset,c("Other ","Afghanistan","Bhutan","Bosnia And Herzegovina",
                                                    "Burma","Dem. Rep. Congo","Eritrea",
                                                    "Ethiopia","Iran","Iraq","Liberia",
                                                    "Russia","Somalia","Sudan","Ukraine","Vietnam"
                                     )),paste(rep(" ",4),collapse=""))
  
  # remove agency
  d <- d[ (d$var %in% c("HIAS","CWS","DFMS","ECDC","IOWA","IRC","KHRW","LIRS","RVTV","USCCB","USCRI","WR"))==FALSE ,]
  
  # age
  d <- filler(pos1="rage_n=1",pos2="rage_n=5",gname="Age",
              df=d,nr=emptyrow,above=2,below=1)
  d$var[d$group=="Age"] <- c("Arrival Age:",
                             paste(offset,c("18-20","20s","30s","40s","50s","60+")),
                             paste(rep(" ",6),collapse=""))
  
  # remove arrival year
  d <- d[-(which(d$var=="rarrivefy_n=2001"):which(d$var=="rarrivefy_n=2014")),]
  
  # remove state
  data(state)
  d <- d[ (d$var %in% c(state.abb,"DC","PR","GU"))==FALSE ,]
  
  # Case size
  d <- filler(pos1="casesize_n=2",pos2="casesize_n=4",gname="Casesize",
              df=d,nr=emptyrow,above=2,below=0)
  d$var[d$group=="Casesize"] <- c("Family Size:",
                                  paste(offset,c("1","2","3","4+")))
  # remove LPR time
  d <- d[-(which(d$var=="grmonthstolpr_receipt=1"):which(d$var=="grmonthstolpr_receipt=4")),]
  
  # estimates
  d$pe <- as.numeric(d$pe)
  d$se <- as.numeric(d$se)
  d$upper <- d$pe + d$se*1.96
  d$lower <- d$pe - d$se*1.96
  
  # tag reference categories
  d$ref <- 0
  d$ref[grep("Other ",d$var)] <- 1
  d$ref[grep("Low",d$var)] <- 1
  d$ref[grep(" Medium Low",d$var)] <- 0
  d$ref[grep("Principal Applicant",d$var)] <- 1
  d$ref[grep("None/Unknown",d$var)] <- 1
  d$ref[grep("18-20",d$var)] <- 1
  d$ref[grep(" 1",d$var)] <- 1
  d$ref[grep("2000",d$var)] <- 1
  
  d$cati <- "plain"
  
  d$cati[which(d$var == "Relationship:")] <- "bold"
  d$cati[which(d$var == "Education:")] <- "bold"
  d$cati[which(d$var == "Female")] <- "bold"
  d$cati[which(d$var == "Nationality:")] <- "bold"
  d$cati[which(d$var == "Arrival Age:")] <- "bold"
  d$cati[which(d$var == "Family Size:")] <- "bold"
  
  
  d$pe[d$ref==1] <- 0
  d$ref <- factor(d$ref)
  d$var <- factor(d$var,levels=unique(d$var)[length(d$var):1])
  
  ## start plots  
  p = ggplot(d,aes(y=pe,x=var,colour=group,shape=ref))#,colour = Portfolio_type))
  p = p + coord_flip(ylim = c(-.4,.4))
  p = p + geom_hline(yintercept = 0,size=.5,colour="blue",linetype="dotted") 
  p = p + geom_pointrange(aes(ymin=lower,ymax=upper),position=position_dodge(width=.1),size=.6)
  p = p + scale_y_continuous(name="Change in Probability of Moving Out of Arrival State",
                             breaks=round(seq(-.4,.4,.2),1),labels=c("-.4","-.2","0",".2",".4"))
  p = p + scale_colour_discrete("Attribute:") + scale_x_discrete(name="") + scale_shape_manual(name="",values=c(16,1))
  print(p)
  
  theme_bw1 <- function(base_size = 10, base_family = "", face="plain") {
    theme_grey(base_size = base_size, base_family = base_family) %+replace%
      theme(
        axis.text.y =       element_text(size = base_size ,face= face , colour = "black"),
        axis.text.x =       element_text(size = base_size , colour = "black"), # changes position of X axis text
        axis.ticks =        element_line(colour = "grey50"),
        axis.title.y =      element_text(size = base_size,angle=90,),
        legend.position = "none"
      )
  }
  base_size = 10
  base_family = ""
  face=d$cati[nrow(d):1]
  p <- p + theme_economist(dkpanel = T,horizontal=T) + theme(
    axis.text.y =       element_text(size = base_size ,face= face , colour = "black"),
    axis.text.x =       element_text(size = base_size , colour = "black"),
    axis.ticks =        element_line(colour = "grey50"),
    axis.title.y =      element_text(size = base_size,angle=90),
    legend.position = "none",panel.grid=element_line(size = .1)
  ) 
  p
  ggsave(paste("SIFig_PushModel_",filenames[i],".pdf",sep=""),p,width=8,height=8)
  
}

# SI Figure s7: Push Model for Principal Applicant ----------------------------------------------------
plotnames <- c("reg_movedrtol_relat_99_x1b")
filenames <- c("Principal_Applicant")
for(i in 1){
  setwd(paste(path,source,sep=""))
  
  plotname <- plotnames[i]
  d <- read.csv(paste(plotname,".csv",sep=""),header=T, stringsAsFactors = FALSE)
  
  # name cols
  names(d) <- c("var","pe","se")
  # remove first row b and se
  d <- d[-1,]
  # remove obs
  d <- d[-nrow(d),]
  # remove const
  d <- d[-nrow(d),]
  # remove ref cats
  d <- d[d$se!=".",]
  
  # set up group var
  d$group="TBA"
  
  # placeholderrow
  emptyrow <-  c("holder",888,0,NA)
  offset <- "  "
  
  
  # gender
  d <- filler(pos1="F",gname="Gender",
              df=d,nr=emptyrow,above=0,below=1)
  d$var[d$group=="Gender"] <- c("Female",paste(rep(" ",2),collapse=""))
  
  # education
  d <- filler(pos1="Less than Secondary",pos2="University",gname="Education",
              df=d,nr=emptyrow,above=2,below=1)
  d$var[d$group=="Education"] <- 
    c("Education:",
      paste(offset,c("None/Unknown","Primary","Secondary","Postsecondary","University")),
      paste(rep(" ",3),collapse=""))
  
  # origin
  d <- filler(pos1="AFGHANISTAN",pos2="VIETNAM",gname="Nationality",
              df=d,nr=emptyrow,above=2,below=1)
  d$var[d$group=="Nationality"] <- c("Nationality:",
                                     paste(offset,c("Other ","Afghanistan","Bhutan","Bosnia And Herzegovina",
                                                    "Burma","Dem. Rep. Congo","Eritrea",
                                                    "Ethiopia","Iran","Iraq","Liberia",
                                                    "Russia","Somalia","Sudan","Ukraine","Vietnam"
                                     )),paste(rep(" ",4),collapse=""))
  
  # us ties
  d <- filler(pos1="free_n=1",gname="Ties",
              df=d,nr=emptyrow,above=0,below=1)
  d$var[d$group=="Ties"] <- c("No US Ties",paste(rep(" ",5),collapse=""))
  
  # remove agency
  d <- d[ (d$var %in% c("HIAS","CWS","DFMS","ECDC","IOWA","IRC","KHRW","LIRS","RVTV","USCCB","USCRI","WR"))==FALSE ,]
  
  # age
  d <- filler(pos1="rage_n=1",pos2="rage_n=5",gname="Age",
              df=d,nr=emptyrow,above=2,below=1)
  d$var[d$group=="Age"] <- c("Arrival Age:",
                             paste(offset,c("18-20","20s","30s","40s","50s","60+")),
                             paste(rep(" ",6),collapse=""))
  
  # remove arrival year
  d <- d[-(which(d$var=="rarrivefy_n=2001"):which(d$var=="rarrivefy_n=2014")),]
  
  # remove state
  data(state)
  d <- d[ (d$var %in% c(state.abb,"DC","PR","GU"))==FALSE ,]
  
  # remove LPR
  d <- d[-(which(d$var=="grmonthstolpr_receipt=1"):which(d$var=="grmonthstolpr_receipt=4")),]
  
  # Case size
  d <- filler(pos1="casesize_n=2",pos2="casesize_n=4",gname="Casesize",
              df=d,nr=emptyrow,above=2,below=0)
  d$var[d$group=="Casesize"] <- c("Family Size:",
                                  paste(offset,c("1","2","3","4+")))
  
  
  # estimates
  d$pe <- as.numeric(d$pe)
  d$se <- as.numeric(d$se)
  d$upper <- d$pe + d$se*1.96
  d$lower <- d$pe - d$se*1.96
  
  # tag reference categories
  d$ref <- 0
  d$ref[grep("Low",d$var)] <- 1
  d$ref[grep(" Medium Low",d$var)] <- 0
  d$ref[grep("Other ",d$var)] <- 1
  d$ref[grep("Principal Applicant",d$var)] <- 1
  d$ref[grep("None/Unknown",d$var)] <- 1
  d$ref[grep("18-20",d$var)] <- 1
  d$ref[grep(" 1",d$var)] <- 1
  d$ref[grep("2000",d$var)] <- 1
  
  d$cati <- "plain"
  
  d$cati[which(d$var == "Relationship:")] <- "bold"
  d$cati[which(d$var == "Education:")] <- "bold"
  d$cati[which(d$var == "Female")] <- "bold"
  d$cati[which(d$var == "Nationality:")] <- "bold"
  d$cati[which(d$var == "No US Ties")] <- "bold"
  d$cati[which(d$var == "Arrival Age:")] <- "bold"
  d$cati[which(d$var == "Arrival Year:")] <- "bold"
  d$cati[which(d$var == "Family Size:")] <- "bold"
  
  d$cati[which(d$var == "Share of Co-nationals:")] <- "bold"
  d$cati[which(d$var == "Cost of Housing:")] <- "bold"
  d$cati[which(d$var == "Unemployment:")] <- "bold"
  d$cati[which(d$var == "Welfare spending per poor:")] <- "bold"  
  d$cati[which(d$var == "GDP per capita:")] <- "bold"  
  d$cati[which(d$var == "Democratic Governor:")] <- "bold"  
  
  
  
  d$pe[d$ref==1] <- 0
  d$ref <- factor(d$ref)
  d$var <- factor(d$var,levels=unique(d$var)[length(d$var):1])
  
  ## start plots  
  p = ggplot(d,aes(y=pe,x=var,colour=group,shape=ref))#,colour = Portfolio_type))
  p = p + coord_flip(ylim = c(-.35,.35))
  p = p + geom_hline(yintercept = 0,size=.5,colour="blue",linetype="dotted") 
  p = p + geom_pointrange(aes(ymin=lower,ymax=upper),position=position_dodge(width=.1),size=.6)
  p = p + scale_y_continuous(name="Change in Probability of Moving Out of Arrival State",
                             breaks=round(seq(-.4,.4,.2),1),labels=c("-.4","-.2","0",".2",".4"))
  p = p + scale_colour_discrete("Attribute:") + scale_x_discrete(name="") + scale_shape_manual(name="",values=c(16,1))
  print(p)
  
  theme_bw1 <- function(base_size = 10, base_family = "", face="plain") {
    theme_grey(base_size = base_size, base_family = base_family) %+replace%
      theme(
        axis.text.y =       element_text(size = base_size ,face= face , colour = "black"),
        axis.text.x =       element_text(size = base_size , colour = "black"),
        axis.ticks =        element_line(colour = "grey50"),
        axis.title.y =      element_text(size = base_size,angle=90),
        legend.position = "none"
      )
  }
  base_size = 10
  base_family = ""
  face=d$cati[nrow(d):1]
  p <- p + theme_economist(dkpanel = T,horizontal=T) + theme(
    axis.text.y =       element_text(size = base_size ,face= face , colour = "black"),
    axis.text.x =       element_text(size = base_size , colour = "black"), # changes position of X axis text
    axis.ticks =        element_line(colour = "grey50"),
    axis.title.y =      element_text(size = base_size,angle=90),
    legend.position = "none",panel.grid=element_line(size = .1)
  ) 
  p
  
  
  ### results folder
  setwd(paste(path,results,sep=""))
  ggsave(paste("SIFig_PushModel_",filenames[i],".pdf",sep=""),p,width=8,height=8)
  
}

# SI Figure s8: Push Model for all states ----------------------------------------------------
plotname <- "reg_movedrtol_x1b"
d <- read.csv(paste(plotname,".csv",sep=""),header=T, stringsAsFactors = FALSE)

# name cols
names(d) <- c("var","pe","se")
# remove first row b and se
d <- d[-1,]
# remove obs
d <- d[-nrow(d),]
# remove const
d <- d[-nrow(d),]
# remove ref cats
d <- d[d$se!=".",]

# set up group var
d$group="TBA"

# placeholderrow
emptyrow <-  c("holder",888,0,NA)
offset <- "  "

# relationship
d <- filler(pos1="CHILD",pos2="SPOUSE",gname="Relationship",
            df=d,nr=emptyrow,above=2,below=1)
d$var[d$group=="Relationship"] <- 
  c("Relationship:",
    paste(offset,c("Principal Applicant","Child","Other","Parent","Sibling","Spouse")),
    paste(rep(" ",1),collapse=""))

# gender
d <- filler(pos1="F",gname="Gender",
            df=d,nr=emptyrow,above=0,below=1)
d$var[d$group=="Gender"] <- c("Female",paste(rep(" ",2),collapse=""))

# education
d <- filler(pos1="Less than Secondary",pos2="University",gname="Education",
            df=d,nr=emptyrow,above=2,below=1)
d$var[d$group=="Education"] <- 
  c("Education:",
    paste(offset,c("None/Unknown","Primary","Secondary","Postsecondary","University")),
    paste(rep(" ",3),collapse=""))

# origin
d <- filler(pos1="AFGHANISTAN",pos2="VIETNAM",gname="Nationality",
            df=d,nr=emptyrow,above=2,below=1)
d$var[d$group=="Nationality"] <- c("Nationality:",
                                   paste(offset,c("Other ","Afghanistan","Bhutan","Bosnia And Herzegovina",
                                                  "Burma","Dem. Rep. Congo","Eritrea",
                                                  "Ethiopia","Iran","Iraq","Liberia",
                                                  "Russia","Somalia","Sudan","Ukraine","Vietnam"
                                   )),paste(rep(" ",4),collapse=""))

# us ties
d <- filler(pos1="free_n=1",gname="Ties",
            df=d,nr=emptyrow,above=0,below=1)
d$var[d$group=="Ties"] <- c("No US Ties",paste(rep(" ",5),collapse=""))

# remove agency
d <- d[ (d$var %in% c("HIAS","CWS","DFMS","ECDC","IOWA","IRC","KHRW","LIRS","RVTV","USCCB","USCRI","WR"))==FALSE ,]

# age
d <- filler(pos1="rage_n=1",pos2="rage_n=5",gname="Age",
            df=d,nr=emptyrow,above=2,below=1)
d$var[d$group=="Age"] <- c("Arrival Age:",
                           paste(offset,c("18-20","20s","30s","40s","50s","60+")),
                           paste(rep(" ",6),collapse=""))

# remove arrival year
d <- d[-(which(d$var=="rarrivefy_n=2001"):which(d$var=="rarrivefy_n=2014")),]

# remove state
data(state)
d <- d[ (d$var %in% c(state.abb,"DC","PR","GU"))==FALSE ,]

# Case size
d <- filler(pos1="casesize_n=2",pos2="casesize_n=4",gname="Casesize",
            df=d,nr=emptyrow,above=2,below=0)
d$var[d$group=="Casesize"] <- c("Family Size:",
                                paste(offset,c("1","2","3","4+")))

# drop time to LPR
d <- d[-(which(d$var=="grmonthstolpr_receipt=1"):which(d$var=="grmonthstolpr_receipt=4")),]


# estimates
d$pe <- as.numeric(d$pe)
d$se <- as.numeric(d$se)
d$upper <- d$pe + d$se*1.96
d$lower <- d$pe - d$se*1.96

# tag reference categories
d$ref <- 0
d$ref[grep("Low",d$var)] <- 1
d$ref[grep(" Medium Low",d$var)] <- 0
d$ref[grep("Other ",d$var)] <- 1
d$ref[grep("Principal Applicant",d$var)] <- 1
d$ref[grep("None/Unknown",d$var)] <- 1
d$ref[grep("13-19",d$var)] <- 1
d$ref[grep(" 1",d$var)] <- 1
d$ref[grep("2000",d$var)] <- 1

d$cati <- "plain"

d$cati[which(d$var == "Relationship:")] <- "bold"
d$cati[which(d$var == "Education:")] <- "bold"
d$cati[which(d$var == "Female")] <- "bold"
d$cati[which(d$var == "Nationality:")] <- "bold"
d$cati[which(d$var == "No US Ties")] <- "bold"
d$cati[which(d$var == "Arrival Age:")] <- "bold"
d$cati[which(d$var == "Arrival Year:")] <- "bold"
d$cati[which(d$var == "Family Size:")] <- "bold"

d$cati[which(d$var == "Share of Co-nationals:")] <- "bold"
d$cati[which(d$var == "Cost of Housing:")] <- "bold"
d$cati[which(d$var == "Unemployment:")] <- "bold"
d$cati[which(d$var == "Welfare spending per poor:")] <- "bold"  
d$cati[which(d$var == "GDP per capita:")] <- "bold"  
d$cati[which(d$var == "Democratic Governor:")] <- "bold"  

d$pe[d$ref==1] <- 0
d$ref <- factor(d$ref)
d$var <- factor(d$var,levels=unique(d$var)[length(d$var):1])

## start plots  
p = ggplot(d,aes(y=pe,x=var,colour=group,shape=ref))#,colour = Portfolio_type))
p = p + coord_flip(ylim = c(-.35,.35))
p = p + geom_hline(yintercept = 0,size=.5,colour="blue",linetype="dotted") 
p = p + geom_pointrange(aes(ymin=lower,ymax=upper),position=position_dodge(width=.1),size=.6)
p = p + scale_y_continuous(name="Change in Probability of Moving Out of Arrival State",
                           breaks=round(seq(-.4,.4,.2),1),labels=c("-.4","-.2","0",".2",".4"))
p = p + scale_colour_discrete("Attribute:") + scale_x_discrete(name="") + scale_shape_manual(name="",values=c(16,1))
print(p)

theme_bw1 <- function(base_size = 10, base_family = "", face="plain") {
  theme_grey(base_size = base_size, base_family = base_family) %+replace%
    theme(
      axis.text.y =       element_text(size = base_size ,face= face , colour = "black"),
      axis.text.x =       element_text(size = base_size , colour = "black"), # changes position of X axis text
      axis.ticks =        element_line(colour = "grey50"),
      axis.title.y =      element_text(size = base_size,angle=90),
      legend.position = "none"
    )
}
base_size = 10
base_family = ""
face=d$cati[nrow(d):1]
p <- p + theme_economist(dkpanel = T,horizontal=T) + theme(
  axis.text.y =       element_text(size = base_size ,face= face , colour = "black"),
  axis.text.x =       element_text(size = base_size , colour = "black"), # changes position of X axis text
  axis.ticks =        element_line(colour = "grey50"),
  axis.title.y =      element_text(size = base_size,angle=90),
  legend.position = "none",panel.grid=element_line(size = .1)
) 
p
ggsave(paste("Fig2allstates.pdf",sep=""),p,width=8,height=8)


# S1 Figure s9: Push Model using only LPRs who ajusted within 24 months  ------------------------------------
plotname <- "reg_movedrtol24_bigstates_x1b"
d <- read.csv(paste(plotname,".csv",sep=""),header=T, stringsAsFactors = FALSE)

# name cols
names(d) <- c("var","pe","se")
# remove first row b and se
d <- d[-1,]
# remove obs
d <- d[-nrow(d),]
# remove const
d <- d[-nrow(d),]
# remove ref cats
d <- d[d$se!=".",]

# set up group var
d$group="TBA"

# placeholderrow
emptyrow <-  c("holder",888,0,NA)
offset <- "  "

# relationship
d <- filler(pos1="CHILD",pos2="SPOUSE",gname="Relationship",
            df=d,nr=emptyrow,above=2,below=1)
d$var[d$group=="Relationship"] <- 
  c("Relationship:",
    paste(offset,c("Principal Applicant","Child","Other","Parent","Sibling","Spouse")),
    paste(rep(" ",1),collapse=""))

# gender
d <- filler(pos1="F",gname="Gender",
            df=d,nr=emptyrow,above=0,below=1)
d$var[d$group=="Gender"] <- c("Female",paste(rep(" ",2),collapse=""))

# education
d <- filler(pos1="Less than Secondary",pos2="University",gname="Education",
            df=d,nr=emptyrow,above=2,below=1)
d$var[d$group=="Education"] <- 
  c("Education:",
    paste(offset,c("None/Unknown","Primary","Secondary","Postsecondary","University")),
    paste(rep(" ",3),collapse=""))

# origin
d <- filler(pos1="AFGHANISTAN",pos2="VIETNAM",gname="Nationality",
            df=d,nr=emptyrow,above=2,below=1)
d$var[d$group=="Nationality"] <- c("Nationality:",
                                   paste(offset,c("Other ","Afghanistan","Bhutan","Bosnia And Herzegovina",
                                                  "Burma","Dem. Rep. Congo","Eritrea",
                                                  "Ethiopia","Iran","Iraq","Liberia",
                                                  "Russia","Somalia","Sudan","Ukraine","Vietnam"
                                   )),paste(rep(" ",4),collapse=""))

# us ties
d <- filler(pos1="free_n=1",gname="Ties",
            df=d,nr=emptyrow,above=0,below=1)
d$var[d$group=="Ties"] <- c("No US Ties",paste(rep(" ",5),collapse=""))

# remove agency
d <- d[ (d$var %in% c("HIAS","CWS","DFMS","ECDC","IOWA","IRC","KHRW","LIRS","RVTV","USCCB","USCRI","WR"))==FALSE ,]

# age
d <- filler(pos1="rage_n=1",pos2="rage_n=5",gname="Age",
            df=d,nr=emptyrow,above=2,below=1)
d$var[d$group=="Age"] <- c("Arrival Age:",
                           paste(offset,c("18-20","20s","30s","40s","50s","60+")),
                           paste(rep(" ",6),collapse=""))

# remove arrival year
d <- d[-(which(d$var=="rarrivefy_n=2001"):which(d$var=="rarrivefy_n=2014")),]

# arrival year
# d <- filler(pos1="rarrivefy_n=2001",pos2="rarrivefy_n=2014",gname="Arrival Year",
#                df=d,nr=emptyrow,above=2,below=1)
#  d$var[d$group=="Arrival Year"] <- c("Arrival Year:",
#                                        paste(offset,c(2000:2015)),
#                                        paste(rep(" ",7),collapse=""))

# remove state
data(state)
d <- d[ (d$var %in% c(state.abb,"DC"))==FALSE ,]

# Case size
d <- filler(pos1="casesize_n=2",pos2="casesize_n=4",gname="Casesize",
            df=d,nr=emptyrow,above=2,below=0)
d$var[d$group=="Casesize"] <- c("Family Size:",
                                paste(offset,c("1","2","3","4+")))

# drop time to LPR
d <- d[-(which(d$var=="grmonthstolpr_receipt=1"):which(d$var=="grmonthstolpr_receipt=4")),]


# estimates
d$pe <- as.numeric(d$pe)
d$se <- as.numeric(d$se)
d$upper <- d$pe + d$se*1.96
d$lower <- d$pe - d$se*1.96

# tag reference categories
d$ref <- 0
d$ref[grep("Low",d$var)] <- 1
d$ref[grep(" Medium Low",d$var)] <- 0
d$ref[grep("Other ",d$var)] <- 1
d$ref[grep("Principal Applicant",d$var)] <- 1
d$ref[grep("None/Unknown",d$var)] <- 1
d$ref[grep("13-19",d$var)] <- 1
d$ref[grep(" 1",d$var)] <- 1
d$ref[grep("2000",d$var)] <- 1

d$cati <- "plain"

d$cati[which(d$var == "Relationship:")] <- "bold"
d$cati[which(d$var == "Education:")] <- "bold"
d$cati[which(d$var == "Female")] <- "bold"
d$cati[which(d$var == "Nationality:")] <- "bold"
d$cati[which(d$var == "No US Ties")] <- "bold"
d$cati[which(d$var == "Arrival Age:")] <- "bold"
d$cati[which(d$var == "Arrival Year:")] <- "bold"
d$cati[which(d$var == "Family Size:")] <- "bold"

d$cati[which(d$var == "Share of Co-nationals:")] <- "bold"
d$cati[which(d$var == "Cost of Housing:")] <- "bold"
d$cati[which(d$var == "Unemployment:")] <- "bold"
d$cati[which(d$var == "Welfare spending per poor:")] <- "bold"  
d$cati[which(d$var == "GDP per capita:")] <- "bold"  
d$cati[which(d$var == "Democratic Governor:")] <- "bold"  



d$pe[d$ref==1] <- 0
d$ref <- factor(d$ref)
d$var <- factor(d$var,levels=unique(d$var)[length(d$var):1])

## start plots  
p = ggplot(d,aes(y=pe,x=var,colour=group,shape=ref))#,colour = Portfolio_type))
p = p + coord_flip(ylim = c(-.35,.35))
p = p + geom_hline(yintercept = 0,size=.5,colour="blue",linetype="dotted") 
p = p + geom_pointrange(aes(ymin=lower,ymax=upper),position=position_dodge(width=.1),size=.6)
p = p + scale_y_continuous(name="Change in Probability of Moving Out of Arrival State",
                           breaks=round(seq(-.4,.4,.2),1),labels=c("-.4","-.2","0",".2",".4"))
p = p + scale_colour_discrete("Attribute:") + scale_x_discrete(name="") + scale_shape_manual(name="",values=c(16,1))
print(p)

theme_bw1 <- function(base_size = 10, base_family = "", face="plain") {
  theme_grey(base_size = base_size, base_family = base_family) %+replace%
    theme(
      axis.text.y =       element_text(size = base_size ,face= face , colour = "black"),
      axis.text.x =       element_text(size = base_size , colour = "black"),
      axis.ticks =        element_line(colour = "grey50"),
      axis.title.y =      element_text(size = base_size,angle=90),
      legend.position = "none"
    )
}
base_size = 10
base_family = ""
face=d$cati[nrow(d):1]
p <- p + theme_economist(dkpanel = T,horizontal=T) + theme(
  axis.text.y =       element_text(size = base_size ,face= face , colour = "black"),
  axis.text.x =       element_text(size = base_size , colour = "black"), # changes position of X axis text
  axis.ticks =        element_line(colour = "grey50"),
  axis.title.y =      element_text(size = base_size,angle=90),
  legend.position = "none",panel.grid=element_line(size = .1)
) 
p
ggsave(paste("Fig2bigstates_below24months.pdf",sep=""),p,width=8,height=8)


# SI Figure s10: Gravity Model with All states -------------------------------------------------
plotname <- "gravity_main_allstates"
d <- read.csv(paste(plotname,".csv",sep=""),header=T, stringsAsFactors = FALSE)

# name cols
names(d) <- c("var","pe","se")
# remove first row b and se
d <- d[-1,]
# remove obs
d <- d[-nrow(d),]
# remove const
d <- d[-nrow(d),]
# remove ref cats
d <- d[d$se!=".",]
d <- d[d$pe!="0",]

# set up group var
d$group="TBA"

# placeholderrow
emptyrow <-  c("holder",888,0,NA)
offset <- "  "

# Co share
cats <- c("1st quintile","2nd quintile","3rd quintile","4th quintile","5th quintile")

d <- filler(pos1="1.gr_l_coshare",pos2="4.gr_l_coshare",gname="Destination State: Share of Co-nationals",
            df=d,nr=emptyrow,above=3,below=1)
d$var[d$group=="Destination State: Share of Co-nationals"] <- c("Share of Co-nationals:"," Destination State",
                                                                paste(offset,paste(cats,paste(rep(" ",1),collapse="")
                                                                )),
                                                                paste(rep(" ",1),collapse=""))

d <- filler(pos1="1.gr_r_coshare",pos2="4.gr_r_coshare",gname="Arrival State: Share of Co-nationals",
            df=d,nr=emptyrow,above=2,below=1)
d$var[d$group=="Arrival State: Share of Co-nationals"] <- c(" Arrival State",
                                                            paste(offset,paste(cats,paste(rep(" ",2),collapse="")
                                                            )),
                                                            paste(rep(" ",2),collapse=""))

# cost of housing
d <- filler(pos1="1.gr_l_col_housing",pos2="4.gr_l_col_housing",gname="Destination State: Cost of Housing",
            df=d,nr=emptyrow,above=3,below=1)
d$var[d$group=="Destination State: Cost of Housing"] <- c("Cost of Housing:"," Destination State ",
                                                          paste(offset,paste(cats,paste(rep(" ",3),collapse="")
                                                          )),
                                                          paste(rep(" ",3),collapse=""))

d <- filler(pos1="1.gr_r_col_housing",pos2="4.gr_r_col_housing",gname="Arrival State: Cost of Housing",
            df=d,nr=emptyrow,above=2,below=1)
d$var[d$group=="Arrival State: Cost of Housing"] <- c(" Arrival State ",
                                                      paste(offset,paste(cats,paste(rep(" ",4),collapse="")
                                                      )),
                                                      paste(rep(" ",4),collapse=""))

# unemployment 
d <- filler(pos1="1.gr_l_unemployment",pos2="4.gr_l_unemployment",gname="Destination State: Unemployment",
            df=d,nr=emptyrow,above=3,below=1)
d$var[d$group=="Destination State: Unemployment"] <- c("Unemployment:"," Destination State  ",
                                                       paste(offset,paste(cats,paste(rep(" ",5),collapse="")
                                                       )),
                                                       paste(rep(" ",5),collapse=""))

d <- filler(pos1="1.gr_r_unemployment",pos2="4.gr_r_unemployment",gname="Arrival State: Unemployment",
            df=d,nr=emptyrow,above=2,below=1)
d$var[d$group=="Arrival State: Unemployment"] <- c(" Arrival State  ",
                                                   paste(offset,paste(cats,paste(rep(" ",6),collapse="")
                                                   )),
                                                   paste(rep(" ",6),collapse=""))


# welfare spending per poor
d <- filler(pos1="1.gr_l_expend_welfare_poorpop",pos2="4.gr_l_expend_welfare_poorpop",gname="Destination State: Welfare spending",
            df=d,nr=emptyrow,above=3,below=1)
d$var[d$group=="Destination State: Welfare spending"] <- c("Welfare spending:"," Destination State   ",
                                                           paste(offset,paste(cats,paste(rep(" ",7),collapse="")
                                                           )),
                                                           paste(rep(" ",7),collapse=""))

d <- filler(pos1="1.gr_r_expend_welfare_poorpop",pos2="4.gr_r_expend_welfare_poorpop",gname="Arrival State: Welfare spending",
            df=d,nr=emptyrow,above=2,below=1)
d$var[d$group=="Arrival State: Welfare spending"] <- c(" Arrival State   ",
                                                       paste(offset,paste(cats,paste(rep(" ",8),collapse="")
                                                       )),
                                                       paste(rep(" ",8),collapse=""))

# GDP per capita
d <- filler(pos1="1.gr_l_realgdp_percap",pos2="4.gr_l_realgdp_percap",gname="Destination State: GDP per capita",
            df=d,nr=emptyrow,above=3,below=1)
d$var[d$group=="Destination State: GDP per capita"] <- c("GDP per capita:"," Destination State    ",
                                                         paste(offset,paste(cats,paste(rep(" ",9),collapse="")
                                                         )),
                                                         paste(rep(" ",9),collapse=""))

d <- filler(pos1="1.gr_r_realgdp_percap",pos2="4.gr_r_realgdp_percap",gname="Arrival State: GDP per capita",
            df=d,nr=emptyrow,above=2,below=1)
d$var[d$group=="Arrival State: GDP per capita"] <- c(" Arrival State    ",
                                                     paste(offset,paste(cats,paste(rep(" ",10),collapse="")
                                                     )),
                                                     paste(rep(" ",10),collapse=""))

# govenor
d <- filler(pos1="l_govparty_dem",gname="Destination State: Governor Democrat",
            df=d,nr=emptyrow,above=1,below=1)
d$var[d$group=="Destination State: Governor Democrat"] <- c("Democratic Governor:"," Destination State     ",paste(rep(" ",11),collapse=""))

# govenor
d <- filler(pos1="r_govparty_dem",gname="Arrival State: Governor Democrat",
            df=d,nr=emptyrow,above=0,below=1)
d$var[d$group=="Arrival State: Governor Democrat"] <- c(" Arrival State     ",paste(rep(" ",12),collapse=""))


d <- d[-(which(d$var == "1.gr_lnarrivals"): which(d$var =="2014.year")),]


# estimates
d$pe <- as.numeric(d$pe)
d$se <- as.numeric(d$se)
d$upper <- d$pe + d$se*1.96
d$lower <- d$pe - d$se*1.96

# tag reference categories
d$ref <- 0
d$ref[grep("1st quintile",d$var)] <- 1
#  d$ref[grep(" Medium Low",d$var)] <- 0

d$cati <- "plain"
d$cati[grep("Share",d$var)] <- "bold"
d$cati[grep("Cost",d$var)] <- "bold"
d$cati[grep("Unemploy",d$var)] <- "bold"
d$cati[grep("Welfare",d$var)] <- "bold"
d$cati[grep("GDP",d$var)] <- "bold"
d$cati[grep("Democrat",d$var)] <- "bold"

d$cati[grep("State",d$var)] <- "italic"

d$group2 <- "TBA"
d$group2[d$group %in% c("Destination State: Share of Co-nationals",
                        "Arrival State: Share of Co-nationals")] <- "Share of Co-nationals"

d$group2[d$group %in% c("Destination State: Cost of Housing",
                        "Arrival State: Cost of Housing")] <- "Cost of Housing"

d$group2[d$group %in% c("Destination State: Unemployment",
                        "Arrival State: Unemployment")] <- "Unemployment"

d$group2[d$group %in% c("Destination State: Welfare spending",
                        "Arrival State: Welfare spending")] <- "Welfare spending"

d$group2[d$group %in% c("Destination State: GDP per capita",
                        "Arrival State: GDP per capita")] <- "GDP per capita"

d$group2[d$group %in% c("Destination State: Governor Democrat",
                        "Arrival State: Governor Democrat")] <- "Governor Democrat"

d$pe[d$ref==1] <- 0
d$ref <- factor(d$ref)
d$var <- factor(d$var,levels=unique(d$var)[length(d$var):1])

## start plots  
p = ggplot(d,aes(y=pe,x=var,colour=group2,shape=ref))#,colour = Portfolio_type))
p = p + coord_flip(ylim = c(-.25,.25))
p = p + geom_hline(yintercept = 0,size=.5,colour="blue",linetype="dotted") 
p = p + geom_pointrange(aes(ymin=lower,ymax=upper),position=position_dodge(width=.1),size=.6)
p = p + scale_y_continuous(name="Percent Change in Secondary Migration Between States",
                           breaks=round(seq(-.2,.2,.1),1),labels=c("-20","-10","0","10","20"))
p = p + scale_colour_discrete("Attribute:") + scale_x_discrete(name="") + scale_shape_manual(name="",values=c(16,1))
print(p)

theme_bw1 <- function(base_size = 10, base_family = "", face="plain") {
  theme_grey(base_size = base_size, base_family = base_family) %+replace%
    theme(
      axis.text.y =       element_text(size = base_size ,face= face , colour = "black"),
      axis.text.x =       element_text(size = base_size , colour = "black"), # changes position of X axis text
      axis.ticks =        element_line(colour = "grey50"),
      axis.title.y =      element_text(size = base_size,angle=90),
      legend.position = "none"
    )
}
base_size = 10
face=d$cati[nrow(d):1]
p <- p + theme_bw1(base_size = 10, base_family = "",face=d$cati[nrow(d):1])
p <- p +
  theme_economist(dkpanel = T,horizontal=T) + theme(
    axis.text.y =       element_text(size = base_size ,face= face , colour = "black"),
    axis.text.x =       element_text(size = base_size , colour = "black"), # changes position of X axis text
    axis.ticks =        element_line(colour = "grey50"),
    axis.title.y =      element_text(size = base_size,angle=90),
    legend.position = "none",panel.grid=element_line(size = .1)
  )
ggsave(paste("fig3allstates.pdf",sep=""),p,width=8,height=10)

# SI Figure s11: Gravity Model using only LPRs who ajusted within 24 months -------------------------------------------------
plotname <- "gravity_main_24months"
d <- read.csv(paste(plotname,".csv",sep=""),header=T, stringsAsFactors = FALSE)

# name cols
names(d) <- c("var","pe","se")
# remove first row b and se
d <- d[-1,]
# remove obs
d <- d[-nrow(d),]
# remove const
d <- d[-nrow(d),]
# remove ref cats
d <- d[d$se!=".",]
d <- d[d$pe!="0",]

# set up group var
d$group="TBA"

# placeholderrow
emptyrow <-  c("holder",888,0,NA)
offset <- "  "

# Co share
cats <- c("1st quintile","2nd quintile","3rd quintile","4th quintile","5th quintile")

d <- filler(pos1="1.gr_l_coshare",pos2="4.gr_l_coshare",gname="Destination State: Share of Co-nationals",
            df=d,nr=emptyrow,above=3,below=1)
d$var[d$group=="Destination State: Share of Co-nationals"] <- c("Share of Co-nationals:"," Destination State",
                                                                paste(offset,paste(cats,paste(rep(" ",1),collapse="")
                                                                )),
                                                                paste(rep(" ",1),collapse=""))

d <- filler(pos1="1.gr_r_coshare",pos2="4.gr_r_coshare",gname="Arrival State: Share of Co-nationals",
            df=d,nr=emptyrow,above=2,below=1)
d$var[d$group=="Arrival State: Share of Co-nationals"] <- c(" Arrival State",
                                                            paste(offset,paste(cats,paste(rep(" ",2),collapse="")
                                                            )),
                                                            paste(rep(" ",2),collapse=""))

# cost of housing
d <- filler(pos1="1.gr_l_col_housing",pos2="4.gr_l_col_housing",gname="Destination State: Cost of Housing",
            df=d,nr=emptyrow,above=3,below=1)
d$var[d$group=="Destination State: Cost of Housing"] <- c("Cost of Housing:"," Destination State ",
                                                          paste(offset,paste(cats,paste(rep(" ",3),collapse="")
                                                          )),
                                                          paste(rep(" ",3),collapse=""))

d <- filler(pos1="1.gr_r_col_housing",pos2="4.gr_r_col_housing",gname="Arrival State: Cost of Housing",
            df=d,nr=emptyrow,above=2,below=1)
d$var[d$group=="Arrival State: Cost of Housing"] <- c(" Arrival State ",
                                                      paste(offset,paste(cats,paste(rep(" ",4),collapse="")
                                                      )),
                                                      paste(rep(" ",4),collapse=""))

# unemployment 
d <- filler(pos1="1.gr_l_unemployment",pos2="4.gr_l_unemployment",gname="Destination State: Unemployment",
            df=d,nr=emptyrow,above=3,below=1)
d$var[d$group=="Destination State: Unemployment"] <- c("Unemployment:"," Destination State  ",
                                                       paste(offset,paste(cats,paste(rep(" ",5),collapse="")
                                                       )),
                                                       paste(rep(" ",5),collapse=""))

d <- filler(pos1="1.gr_r_unemployment",pos2="4.gr_r_unemployment",gname="Arrival State: Unemployment",
            df=d,nr=emptyrow,above=2,below=1)
d$var[d$group=="Arrival State: Unemployment"] <- c(" Arrival State  ",
                                                   paste(offset,paste(cats,paste(rep(" ",6),collapse="")
                                                   )),
                                                   paste(rep(" ",6),collapse=""))


# welfare spending per poor
d <- filler(pos1="1.gr_l_expend_welfare_poorpop",pos2="4.gr_l_expend_welfare_poorpop",gname="Destination State: Welfare spending",
            df=d,nr=emptyrow,above=3,below=1)
d$var[d$group=="Destination State: Welfare spending"] <- c("Welfare spending:"," Destination State   ",
                                                           paste(offset,paste(cats,paste(rep(" ",7),collapse="")
                                                           )),
                                                           paste(rep(" ",7),collapse=""))

d <- filler(pos1="1.gr_r_expend_welfare_poorpop",pos2="4.gr_r_expend_welfare_poorpop",gname="Arrival State: Welfare spending",
            df=d,nr=emptyrow,above=2,below=1)
d$var[d$group=="Arrival State: Welfare spending"] <- c(" Arrival State   ",
                                                       paste(offset,paste(cats,paste(rep(" ",8),collapse="")
                                                       )),
                                                       paste(rep(" ",8),collapse=""))

# GDP per capita
d <- filler(pos1="1.gr_l_realgdp_percap",pos2="4.gr_l_realgdp_percap",gname="Destination State: GDP per capita",
            df=d,nr=emptyrow,above=3,below=1)
d$var[d$group=="Destination State: GDP per capita"] <- c("GDP per capita:"," Destination State    ",
                                                         paste(offset,paste(cats,paste(rep(" ",9),collapse="")
                                                         )),
                                                         paste(rep(" ",9),collapse=""))

d <- filler(pos1="1.gr_r_realgdp_percap",pos2="4.gr_r_realgdp_percap",gname="Arrival State: GDP per capita",
            df=d,nr=emptyrow,above=2,below=1)
d$var[d$group=="Arrival State: GDP per capita"] <- c(" Arrival State    ",
                                                     paste(offset,paste(cats,paste(rep(" ",10),collapse="")
                                                     )),
                                                     paste(rep(" ",10),collapse=""))

# govenor
d <- filler(pos1="l_govparty_dem",gname="Destination State: Governor Democrat",
            df=d,nr=emptyrow,above=1,below=1)
d$var[d$group=="Destination State: Governor Democrat"] <- c("Democratic Governor:"," Destination State     ",paste(rep(" ",11),collapse=""))

# govenor
d <- filler(pos1="r_govparty_dem",gname="Arrival State: Governor Democrat",
            df=d,nr=emptyrow,above=0,below=1)
d$var[d$group=="Arrival State: Governor Democrat"] <- c(" Arrival State     ",paste(rep(" ",12),collapse=""))


d <- d[-(which(d$var == "1.gr_lnarrivals"): which(d$var =="2014.year")),]


# estimates
d$pe <- as.numeric(d$pe)
d$se <- as.numeric(d$se)
d$upper <- d$pe + d$se*1.96
d$lower <- d$pe - d$se*1.96

# tag reference categories
d$ref <- 0
d$ref[grep("1st quintile",d$var)] <- 1
#  d$ref[grep(" Medium Low",d$var)] <- 0

d$cati <- "plain"
d$cati[grep("Share",d$var)] <- "bold"
d$cati[grep("Cost",d$var)] <- "bold"
d$cati[grep("Unemploy",d$var)] <- "bold"
d$cati[grep("Welfare",d$var)] <- "bold"
d$cati[grep("GDP",d$var)] <- "bold"
d$cati[grep("Democrat",d$var)] <- "bold"

d$cati[grep("State",d$var)] <- "italic"

d$group2 <- "TBA"
d$group2[d$group %in% c("Destination State: Share of Co-nationals",
                        "Arrival State: Share of Co-nationals")] <- "Share of Co-nationals"

d$group2[d$group %in% c("Destination State: Cost of Housing",
                        "Arrival State: Cost of Housing")] <- "Cost of Housing"

d$group2[d$group %in% c("Destination State: Unemployment",
                        "Arrival State: Unemployment")] <- "Unemployment"

d$group2[d$group %in% c("Destination State: Welfare spending",
                        "Arrival State: Welfare spending")] <- "Welfare spending"

d$group2[d$group %in% c("Destination State: GDP per capita",
                        "Arrival State: GDP per capita")] <- "GDP per capita"

d$group2[d$group %in% c("Destination State: Governor Democrat",
                        "Arrival State: Governor Democrat")] <- "Governor Democrat"

d$pe[d$ref==1] <- 0
d$ref <- factor(d$ref)
d$var <- factor(d$var,levels=unique(d$var)[length(d$var):1])

## start plots  
p = ggplot(d,aes(y=pe,x=var,colour=group2,shape=ref))#,colour = Portfolio_type))
p = p + coord_flip(ylim = c(-.25,.25))
p = p + geom_hline(yintercept = 0,size=.5,colour="blue",linetype="dotted") 
p = p + geom_pointrange(aes(ymin=lower,ymax=upper),position=position_dodge(width=.1),size=.6)
p = p + scale_y_continuous(name="Percent Change in Secondary Migration Between States",
                           breaks=round(seq(-.2,.2,.1),1),labels=c("-20","-10","0","10","20"))
p = p + scale_colour_discrete("Attribute:") + scale_x_discrete(name="") + scale_shape_manual(name="",values=c(16,1))
print(p)

theme_bw1 <- function(base_size = 10, base_family = "", face="plain") {
  theme_grey(base_size = base_size, base_family = base_family) %+replace%
    theme(
      axis.text.y =       element_text(size = base_size ,face= face , colour = "black"),
      axis.text.x =       element_text(size = base_size , colour = "black"), # changes position of X axis text
      axis.ticks =        element_line(colour = "grey50"),
      axis.title.y =      element_text(size = base_size,angle=90),
      legend.position = "none"
    )
}
base_size = 10
face=d$cati[nrow(d):1]
p <- p + theme_bw1(base_size = 10, base_family = "",face=d$cati[nrow(d):1])
p <- p +
  theme_economist(dkpanel = T,horizontal=T) + theme(
    axis.text.y =       element_text(size = base_size ,face= face , colour = "black"),
    axis.text.x =       element_text(size = base_size , colour = "black"), # changes position of X axis text
    axis.ticks =        element_line(colour = "grey50"),
    axis.title.y =      element_text(size = base_size,angle=90),
    legend.position = "none",panel.grid=element_line(size = .1)
  )
ggsave(paste("fig3_below24month.pdf",sep=""),p,width=8,height=10)


# SI: Figure s12 Move Out Rate by Arrival Year -------------------------------------
rm(list=ls())

## function to prep data for descriptive plots
dataprepdesc <- function(name = NA){
  d <- read.csv(paste(name,sep=""),stringsAsFactors = FALSE)
  colnames(d) <- c("group","mean","obs")
  d$mean <- d$mean*100
  return(d)
}

# by arrivals year
d <- dataprepdesc("crosstab_movedrtol_rarrivefy.csv")
for(i in 1:nrow(d)){
  d$groupname[i] <- paste(d$group[i],"\n(N=",d$obs[i],")",sep="")
}
d$groupname <- factor(d$groupname)

ggplot(d,aes(x=groupname,y=mean)) + geom_bar(stat="identity",position = "dodge",width=0.35,fill="indianred1") + coord_flip() +
  theme_economist(dkpanel = TRUE) + ylim(0,50) +
  theme(axis.title.y = element_blank(),
        axis.title.x = element_text(size=13),axis.text=element_text(size=13)) + ylab("Moved Out of State (%)")
ggsave("SIFig_Move_Rates_by_rarrivefy.pdf",width=11,height=8)
